knitr::opts_chunk$set(message =FALSE, warning =FALSE)if (!require("pacman")) install.packages("pacman")# use this line for installing/loading pacman::p_load(tidyverse, palmerpenguins, here, cowgrid, dsbox, fs, janitor, scales, ggforce, glue, jpeg, png, grid, cowplot) install.packages("openintro")
package 'openintro' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\matto\AppData\Local\Temp\RtmpE9OGSW\downloaded_packages
install.packages("ggridges")
package 'ggridges' successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\matto\AppData\Local\Temp\RtmpE9OGSW\downloaded_packages
# Reading in the data and pivoting the data to a longer format. This will help with the visualization.income_data <-read_csv(here("data", "income.csv"))income_data_longer <-pivot_longer( income_data,cols =c("Rent", "Food", "Clothes", "Tax", "Other"),names_to ="Category",values_to ="Percentage") # Modifies the Category order to matchincome_data_longer$Category <-factor(income_data_longer$Category,levels =c("Other", "Tax", "Clothes","Food", "Rent"))y_axis_relabel <-c("$100-200"="$100-300 ($139.1)","$200-300"="$200-300 ($249.45)","$300-400"="$300-400 ($335.66)","$400-500"="$400-500 ($433.82)","$500-750"="$500-$700 ($547)","$750-1000"="$750-1000 ($880)","$1000 AND OVER"="$1000\nAND OVER ($1125)")# Adds the background. background_img <-readJPEG(here("images", "parchment_paper.jpg"))ggplot(income_data_longer, aes(x = Percentage, y = Class, fill = Category)) +annotation_raster(background_img, xmin =-Inf, xmax =Inf, ymin =-Inf, ymax =Inf) +geom_bar(stat ="identity", width =0.4) +scale_y_discrete(limits =rev(c("$100-200", "$200-300", "$300-400", "$400-500","$500-750", "$750-1000", "$1000 AND OVER" )),labels = y_axis_relabel) +scale_fill_manual(values =c("Rent"="black","Food"="purple","Clothes"="red","Tax"="blue","Other"="gray" )) +geom_text(aes(label =case_when(Percentage <1~"", TRUE~paste0(Percentage, "%"))),color ="white",fontface ="bold",size =2.5,position =position_stack(vjust =0.5) ) +labs(y =NULL,x =NULL ) +theme_minimal() +coord_cartesian(clip ="off") +scale_x_continuous(limits =c(0, 100), expand =c(0,0)) +theme(panel.grid.major =element_blank(),panel.grid.minor =element_blank(),axis.text.y =element_text(hjust =0),axis.title.y =element_text(angle =0, size =10, hjust =-2),axis.text.x =element_blank(),legend.position ="top",legend.title =element_blank(),legend.direction ="horizontal" ) +annotate("text",x =0,y =7.5,label ="Class (Actual Average)",hjust =1,size =3)
#I ran into the issue of the background not covering everything, so I used cowplot to help.
I used https://www.geeksforgeeks.org/r-language/how-to-add-image-to-ggplot2-in-r/ to help figure out how to add the background image.
2 - COVID survey - interpret
This plot is a visual of Covid survey responses regarding attitudes toward COVID-19 vaccines. The x-axis shows the Likert scale from 1-5 for each question, 1 being most agreeable and 5 being least agreeable. The y-axis contains all of the different groups categorized by varying demographics (age, gender, profession, etc.). I do agree with some of these results intuitively. For example, I expected that anyone with a background in medicine would view the vaccine in a positive way. As expected, those in the medical profession ranked many of the questions on the Likert scale closer to a 1 or a 2. I was a little surprised to see that, when comparing medical and nursing results on the question “I will recommend the vaccine…”, that there was a bit more variance on the medical profession. I expected the medical profession and nursing profession to be very very close in scoring. I also expected that the younger generation would rate the vaccine as safer and higher confidence, which turned out to be true in the data, especially compared to older ages.
3 - COVID survey - reconstruct
raw_data <-read_csv(here("data", "covid-survey.csv"), skip =1)view(raw_data)clean_raw_data <- raw_data |>filter(!if_all(-response_id, is.na))view(clean_raw_data)relabel_data <- clean_raw_data |>mutate(exp_already_vax =case_when(exp_already_vax ==0~"No", exp_already_vax ==1~"Yes", TRUE~NA_character_),exp_flu_vax =case_when(exp_flu_vax ==0~"No", exp_flu_vax ==1~"Yes", TRUE~NA_character_),exp_profession =case_when(exp_profession ==0~"Medical", exp_profession ==1~"Nursing", TRUE~NA_character_),exp_gender =case_when(exp_gender ==0~"Male", exp_gender ==1~"Female", exp_gender ==3~"Non-binary third gender", exp_gender ==4~"Prefer not to say", TRUE~NA_character_),exp_race =case_when(exp_race ==1~"American Indian / Alaskan Native", exp_race ==2~"Asian", exp_race ==3~"Black / African American", exp_race ==4~"Native Hawaiian / Other Pacific Islander", exp_race ==5~"White", TRUE~NA_character_),exp_ethnicity =case_when(exp_ethnicity ==1~"Hispanic / Latino", exp_ethnicity ==2~"Non-Hispanic / Non-Latino", TRUE~NA_character_),exp_age_bin =case_when( exp_age_bin ==0~"<20", exp_age_bin ==20~"21-25", exp_age_bin ==25~"26-30", exp_age_bin ==30~">30", TRUE~NA_character_), )view(relabel_data)# The first pivot takes all columns starting with exp_ and groups them into two new columns# called explanatory and explanatory_value. Each explanatory variable is now its own row entry# The second pivot now modifies the columns with "resp_" to do something similar. It groups the# output of the first pivot into multiple rows based on resp.covid_survey_longer <- relabel_data |>pivot_longer(cols =starts_with("exp_"),names_to ="explanatory",values_to ="explanatory_value" ) |>filter(!is.na(explanatory_value)) |>pivot_longer(cols =starts_with("resp_"),names_to ="response",values_to ="response_value" )covid_survey_longer
response_labels <-c("resp_safety"="Based on my understanding,\nI believe the vaccine is safe","resp_confidence_science"="I am confident in the scientific\nvetting process for the new COVID vaccines","resp_feel_safe_at_work"="Getting the vaccine will\nmake me feel safer at work","resp_will_recommend"="I will recommend the vaccine\nto family, friends, and community members","resp_trust_info"="I trust the information\nthat I have received about the vaccines","resp_concern_safety"="I am concerned about the safety\nand side effects of the vaccine")explanatory_labels <-c("All"="All","exp_age_bin"="Age","exp_gender"="Gender","exp_race"="Race","exp_ethnicity"="Ethnicity","exp_profession"="Profession","exp_already_vax"="Had COVID vaccine","exp_flu_vax"="Had flu vaccine this year")# Plot code belowggplot(covid_survey_summary_stats, aes(x = mean, y = explanatory_value)) +geom_point(size =0.7) +geom_errorbarh(aes(xmin = low, xmax = high),height =0.3,size =0.3 ) +facet_grid(explanatory ~ response,scales ="free_y",space ="free_y",labeller =labeller(explanatory =as_labeller(explanatory_labels, label_wrap_gen(15)),response =as_labeller(response_labels, label_wrap_gen(20)) )) +labs(x ="Mean Likert score\n(Error bars range from 10th to 90th percentile)",y =NULL ) +theme(strip.background =element_rect(fill ="gray90", color ="black", size =0.1),axis.text.y =element_text(size =5),strip.text.y =element_text(size =5, angle =0),strip.text.x =element_text(size =5),panel.spacing =unit(.1, "lines"),panel.grid.major =element_blank(),panel.grid.minor =element_blank(),axis.title.x =element_text(size =8),axis.text.x =element_text(size =6) )